{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/peremartra/Large-Language-Model-Notebooks-Course/blob/main/3-LangChain/langchain_retrieval_agent.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "<div align=\"center\">\n",
        "<h1><a href=\"https://github.com/peremartra/Large-Language-Model-Notebooks-Course\">Learn by Doing LLM Projects</a></h1>\n",
        "    <h3>Understand And Apply Large Language Models</h3>\n",
        "    <h2>Create a Medical Assistant RAG System chat with LangChain & ChromaDB</h2>\n",
        "    <p>by <b>Pere Martra</b></p>\n",
        "</div>\n",
        "\n",
        "<div align=\"center\">\n",
        "    &nbsp;\n",
        "    <a target=\"_blank\" href=\"https://www.linkedin.com/in/pere-martra/\"><img src=\"https://img.shields.io/badge/style--5eba00.svg?label=LinkedIn&logo=linkedin&style=social\"></a>\n",
        "    \n",
        "</div>\n",
        "\n"
      ],
      "metadata": {
        "id": "f3cvnSVemTSU"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "#Installing libraries & Loading Dataset"
      ],
      "metadata": {
        "id": "KNW-LniD2I2o"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "pva9ehKXUpU2",
        "outputId": "3ea520fe-6bcf-445c-bc56-dbb1e2a9bbfb"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m803.6/803.6 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m230.3/230.3 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m225.1/225.1 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.9/75.9 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.9/76.9 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "llmx 0.0.15a0 requires cohere, which is not installed.\n",
            "tensorflow-probability 0.22.0 requires typing-extensions<4.6.0, but you have typing-extensions 4.9.0 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mCollecting langchainhub==0.1.14\n",
            "  Downloading langchainhub-0.1.14-py3-none-any.whl (3.4 kB)\n",
            "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchainhub==0.1.14) (2.31.0)\n",
            "Collecting types-requests<3.0.0.0,>=2.31.0.2 (from langchainhub==0.1.14)\n",
            "  Downloading types_requests-2.31.0.20240125-py3-none-any.whl (14 kB)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub==0.1.14) (3.3.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub==0.1.14) (3.6)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub==0.1.14) (2.0.7)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub==0.1.14) (2023.11.17)\n",
            "Installing collected packages: types-requests, langchainhub\n",
            "Successfully installed langchainhub-0.1.14 types-requests-2.31.0.20240125\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.1/507.1 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m509.0/509.0 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.6/60.6 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.7/40.7 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.4/5.4 MB\u001b[0m \u001b[31m84.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.4/6.4 MB\u001b[0m \u001b[31m83.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.9/57.9 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m105.6/105.6 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m698.9/698.9 kB\u001b[0m \u001b[31m48.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m64.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.1/71.1 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m33.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m78.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m72.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "lida 0.0.10 requires kaleido, which is not installed.\n",
            "lida 0.0.10 requires python-multipart, which is not installed.\u001b[0m\u001b[31m\n",
            "\u001b[0m"
          ]
        }
      ],
      "source": [
        "!pip install -q langchain==0.1.4\n",
        "!pip install -q langchain-openai==0.0.5\n",
        "!pip install langchainhub==0.1.14\n",
        "#!pip install --upgrade -q tiktoken==0.5.2\n",
        "!pip install -q datasets==2.16.1\n",
        "!pip install -q chromadb==0.4.22"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install -q openai==1.10.0"
      ],
      "metadata": {
        "id": "ijC0kDQM8e-d"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "We will download the dataset from the Hugging Face datasets library. It's a dataset with information about diseases."
      ],
      "metadata": {
        "id": "KEwdHiGFkK_S"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 237,
          "referenced_widgets": [
            "bc26fc0325be4917b221a832da08c4d2",
            "621edf1b193c4918a42ea2b773babcdf",
            "8b14260b9fa1485f983050aaa3dfe00c",
            "60428c7c0b3644ff8da9c72d7e69a4dd",
            "554df2549be24c1ea95cb5dc54c5aacf",
            "b81e56bbbeae45c9a476d15a0df77831",
            "dd438c2519de4e5398341301e0539e66",
            "880170b293ec45dca6ec2a0b10ea367a",
            "164a608a506941dbb638d928cf79720b",
            "f2523dc2263d493c999475c5c98825d3",
            "532af86698554579b9b80fcd4eb3cc88",
            "2824f1c25cd34b528f696fbb46e9212e",
            "17afad8a68244e7c984cb19b81b1f3a7",
            "835caad936154f909ca2fb7958c8dfa2",
            "fc6aa57d36a84c32becf2724f3bd69cc",
            "88e3e8cef8c84fbda28fe5b9d60d7ea4",
            "c8950fef248e4d758afabaa72cb565b7",
            "368052b9864d426ebf71d34371a825a5",
            "ed202fe8e1e84ff1bbe7666155c3ade1",
            "f3e77cfaa89f4e2498019a77414e370c",
            "66f815a7fbb6407a8d25ca5e58931ef0",
            "26bbfa6fa34542d9a402c0d3373375bd",
            "4bfa22635bee48a09d50eb723835e5df",
            "cd66a27f359e46cc8344e7fb2d8b4a8f",
            "d956eaeff2ca424388adfcf2914dbdfe",
            "69234e6ff09f4bfa80965495c66346e5",
            "7bb305919c4846ca8af19f39aecbb662",
            "8de4268d916a438eaa0b82e9a7e4f77b",
            "337aad5247624adab2661c91842d27bd",
            "98676a2ad920453094ff0bc0120f9a04",
            "670e6bf31e874fb58a893e4e3ef1dbd7",
            "c6df912bd8414bd58bf23c5706706a3a",
            "1ceeaad8e55140a9a2175e85774acc0e"
          ]
        },
        "id": "laSDMjqQXuj-",
        "outputId": "fb736631-f1a9-4af2-e09d-d8cef31849bb"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
            "You will be able to reuse this secret in all of your notebooks.\n",
            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading readme:   0%|          | 0.00/233 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "bc26fc0325be4917b221a832da08c4d2"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading data:   0%|          | 0.00/22.5M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "2824f1c25cd34b528f696fbb46e9212e"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Generating train split: 0 examples [00:00, ? examples/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "4bfa22635bee48a09d50eb723835e5df"
            }
          },
          "metadata": {}
        }
      ],
      "source": [
        "from datasets import load_dataset\n",
        "\n",
        "data = load_dataset(\"keivalya/MedQuad-MedicalQnADataset\", split='train')\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 363
        },
        "id": "JnWZTcJiXzor",
        "outputId": "2e66bd26-a4eb-49d9-8041-a485685efda8"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "             qtype                                           Question  \\\n",
              "0   susceptibility  Who is at risk for Lymphocytic Choriomeningiti...   \n",
              "1         symptoms  What are the symptoms of Lymphocytic Choriomen...   \n",
              "2   susceptibility  Who is at risk for Lymphocytic Choriomeningiti...   \n",
              "3  exams and tests  How to diagnose Lymphocytic Choriomeningitis (...   \n",
              "4        treatment  What are the treatments for Lymphocytic Chorio...   \n",
              "5       prevention  How to prevent Lymphocytic Choriomeningitis (L...   \n",
              "6      information          What is (are) Parasites - Cysticercosis ?   \n",
              "7   susceptibility    Who is at risk for Parasites - Cysticercosis? ?   \n",
              "8  exams and tests        How to diagnose Parasites - Cysticercosis ?   \n",
              "9        treatment  What are the treatments for Parasites - Cystic...   \n",
              "\n",
              "                                              Answer  \n",
              "0  LCMV infections can occur after exposure to fr...  \n",
              "1  LCMV is most commonly recognized as causing ne...  \n",
              "2  Individuals of all ages who come into contact ...  \n",
              "3  During the first phase of the disease, the mos...  \n",
              "4  Aseptic meningitis, encephalitis, or meningoen...  \n",
              "5  LCMV infection can be prevented by avoiding co...  \n",
              "6  Cysticercosis is an infection caused by the la...  \n",
              "7  Cysticercosis is an infection caused by the la...  \n",
              "8  If you think that you may have cysticercosis, ...  \n",
              "9  Some people with cysticercosis do not need to ...  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-b7240d4d-f488-4910-8b94-c372041eaf77\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>qtype</th>\n",
              "      <th>Question</th>\n",
              "      <th>Answer</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>susceptibility</td>\n",
              "      <td>Who is at risk for Lymphocytic Choriomeningiti...</td>\n",
              "      <td>LCMV infections can occur after exposure to fr...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>symptoms</td>\n",
              "      <td>What are the symptoms of Lymphocytic Choriomen...</td>\n",
              "      <td>LCMV is most commonly recognized as causing ne...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>susceptibility</td>\n",
              "      <td>Who is at risk for Lymphocytic Choriomeningiti...</td>\n",
              "      <td>Individuals of all ages who come into contact ...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>exams and tests</td>\n",
              "      <td>How to diagnose Lymphocytic Choriomeningitis (...</td>\n",
              "      <td>During the first phase of the disease, the mos...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>treatment</td>\n",
              "      <td>What are the treatments for Lymphocytic Chorio...</td>\n",
              "      <td>Aseptic meningitis, encephalitis, or meningoen...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>prevention</td>\n",
              "      <td>How to prevent Lymphocytic Choriomeningitis (L...</td>\n",
              "      <td>LCMV infection can be prevented by avoiding co...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>information</td>\n",
              "      <td>What is (are) Parasites - Cysticercosis ?</td>\n",
              "      <td>Cysticercosis is an infection caused by the la...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>susceptibility</td>\n",
              "      <td>Who is at risk for Parasites - Cysticercosis? ?</td>\n",
              "      <td>Cysticercosis is an infection caused by the la...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>exams and tests</td>\n",
              "      <td>How to diagnose Parasites - Cysticercosis ?</td>\n",
              "      <td>If you think that you may have cysticercosis, ...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>treatment</td>\n",
              "      <td>What are the treatments for Parasites - Cystic...</td>\n",
              "      <td>Some people with cysticercosis do not need to ...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-b7240d4d-f488-4910-8b94-c372041eaf77')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-b7240d4d-f488-4910-8b94-c372041eaf77 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-b7240d4d-f488-4910-8b94-c372041eaf77');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-284b511c-9139-43b2-a8d0-195dbb20f290\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-284b511c-9139-43b2-a8d0-195dbb20f290')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-284b511c-9139-43b2-a8d0-195dbb20f290 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ]
          },
          "metadata": {},
          "execution_count": 3
        }
      ],
      "source": [
        "data = data.to_pandas()\n",
        "data.head(10)"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "data = data[0:100]"
      ],
      "metadata": {
        "id": "hf7RQa6B5xpx"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "As you can see, the medical information in the dataset is well-organized, and to someone like me, who is not an expert in the field, it appears to be quite valuable. This information could be a useful addition to any general medicine book to support primary care doctors."
      ],
      "metadata": {
        "id": "4FhUslovtiqn"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Load the langchain libraries to load the document."
      ],
      "metadata": {
        "id": "2sLkrHF6lWhM"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from langchain.document_loaders import DataFrameLoader\n",
        "from langchain.vectorstores import Chroma"
      ],
      "metadata": {
        "id": "cCBAlIb596wZ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "The Document is in the Answer column, and the others columns are Metadata."
      ],
      "metadata": {
        "id": "nGCkVX6xldOR"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "df_loader = DataFrameLoader(data, page_content_column=\"Answer\")\n"
      ],
      "metadata": {
        "id": "JZX8SaTe99Uf"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "df_document = df_loader.load()\n",
        "display(df_document[:2])"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 486
        },
        "id": "HDWnTqRY-IDr",
        "outputId": "1ed8b46a-fa41-4f54-b6eb-4858c90021b6"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "[Document(page_content='LCMV infections can occur after exposure to fresh urine, droppings, saliva, or nesting materials from infected rodents.  Transmission may also occur when these materials are directly introduced into broken skin, the nose, the eyes, or the mouth, or presumably, via the bite of an infected rodent. Person-to-person transmission has not been reported, with the exception of vertical transmission from infected mother to fetus, and rarely, through organ transplantation.', metadata={'qtype': 'susceptibility', 'Question': 'Who is at risk for Lymphocytic Choriomeningitis (LCM)? ?'}),\n",
              " Document(page_content='LCMV is most commonly recognized as causing neurological disease, as its name implies, though infection without symptoms or mild febrile illnesses are more common clinical manifestations. \\n                \\nFor infected persons who do become ill, onset of symptoms usually occurs 8-13 days after exposure to the virus as part of a biphasic febrile illness. This initial phase, which may last as long as a week, typically begins with any or all of the following symptoms: fever, malaise, lack of appetite, muscle aches, headache, nausea, and vomiting. Other symptoms appearing less frequently include sore throat, cough, joint pain, chest pain, testicular pain, and parotid (salivary gland) pain. \\n                \\nFollowing a few days of recovery, a second phase of illness may occur. Symptoms may consist of meningitis (fever, headache, stiff neck, etc.), encephalitis (drowsiness, confusion, sensory disturbances, and/or motor abnormalities, such as paralysis), or meningoencephalitis (inflammation of both the brain and meninges). LCMV has also been known to cause acute hydrocephalus (increased fluid on the brain), which often requires surgical shunting to relieve increased intracranial pressure. In rare instances, infection results in myelitis (inflammation of the spinal cord) and presents with symptoms such as muscle weakness, paralysis, or changes in body sensation. An association between LCMV infection and myocarditis (inflammation of the heart muscles) has been suggested. \\n                \\nPrevious observations show that most patients who develop aseptic meningitis or encephalitis due to LCMV survive. No chronic infection has been described in humans, and after the acute phase of illness, the virus is cleared from the body. However, as in all infections of the central nervous system, particularly encephalitis, temporary or permanent neurological damage is possible. Nerve deafness and arthritis have been reported. \\n                \\nWomen who become infected with LCMV during pregnancy may pass the infection on to the fetus. Infections occurring during the first trimester may result in fetal death and pregnancy termination, while in the second and third trimesters, birth defects can develop. Infants infected In utero can have many serious and permanent birth defects, including vision problems, mental retardation, and hydrocephaly (water on the brain). Pregnant women may recall a flu-like illness during pregnancy, or may not recall any illness. \\n                \\nLCM is usually not fatal. In general, mortality is less than 1%.', metadata={'qtype': 'symptoms', 'Question': 'What are the symptoms of Lymphocytic Choriomeningitis (LCM) ?'})]"
            ]
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "We can chunk the documents. The size to which we want to split the document is a design decision. The larger it is, the larger the prompt will be, and the slower the Model's response process.\n",
        "\n",
        "We also need to consider the maximum prompt size and ensure that the document does not exceed it."
      ],
      "metadata": {
        "id": "zPuTbSsXl3uF"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from langchain.text_splitter import CharacterTextSplitter"
      ],
      "metadata": {
        "id": "8wogWV1_-lxh"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "text_splitter = CharacterTextSplitter(chunk_size=1250, chunk_overlap=100)\n",
        "texts = text_splitter.split_documents(df_document)\n"
      ],
      "metadata": {
        "id": "kg7SRkqO-f9x"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "These warnings we see are because it can't perform the partition of the required size. This is because it waits for a page break to divide the text and does so when possible."
      ],
      "metadata": {
        "id": "y0dn9EfrKoeW"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "first_doc = texts[1]\n",
        "print(first_doc.page_content)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "XaLWV_TzASSB",
        "outputId": "711c146c-1ffe-41a3-cfae-d543413543d3"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "LCMV is most commonly recognized as causing neurological disease, as its name implies, though infection without symptoms or mild febrile illnesses are more common clinical manifestations. \n",
            "                \n",
            "For infected persons who do become ill, onset of symptoms usually occurs 8-13 days after exposure to the virus as part of a biphasic febrile illness. This initial phase, which may last as long as a week, typically begins with any or all of the following symptoms: fever, malaise, lack of appetite, muscle aches, headache, nausea, and vomiting. Other symptoms appearing less frequently include sore throat, cough, joint pain, chest pain, testicular pain, and parotid (salivary gland) pain. \n",
            "                \n",
            "Following a few days of recovery, a second phase of illness may occur. Symptoms may consist of meningitis (fever, headache, stiff neck, etc.), encephalitis (drowsiness, confusion, sensory disturbances, and/or motor abnormalities, such as paralysis), or meningoencephalitis (inflammation of both the brain and meninges). LCMV has also been known to cause acute hydrocephalus (increased fluid on the brain), which often requires surgical shunting to relieve increased intracranial pressure. In rare instances, infection results in myelitis (inflammation of the spinal cord) and presents with symptoms such as muscle weakness, paralysis, or changes in body sensation. An association between LCMV infection and myocarditis (inflammation of the heart muscles) has been suggested. \n",
            "                \n",
            "Previous observations show that most patients who develop aseptic meningitis or encephalitis due to LCMV survive. No chronic infection has been described in humans, and after the acute phase of illness, the virus is cleared from the body. However, as in all infections of the central nervous system, particularly encephalitis, temporary or permanent neurological damage is possible. Nerve deafness and arthritis have been reported. \n",
            "                \n",
            "Women who become infected with LCMV during pregnancy may pass the infection on to the fetus. Infections occurring during the first trimester may result in fetal death and pregnancy termination, while in the second and third trimesters, birth defects can develop. Infants infected In utero can have many serious and permanent birth defects, including vision problems, mental retardation, and hydrocephaly (water on the brain). Pregnant women may recall a flu-like illness during pregnancy, or may not recall any illness. \n",
            "                \n",
            "LCM is usually not fatal. In general, mortality is less than 1%.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "B2_Pt7N6Zg2X"
      },
      "source": [
        "### Initialize the Embedding Model and Vector DB"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "We load the text-embedding-ada-002 model from OpenAI."
      ],
      "metadata": {
        "id": "n8ROb8oMnRLD"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from getpass import getpass\n",
        "OPENAI_API_KEY = getpass(\"OpenAI API Key: \")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "F_Dn06xGwjKP",
        "outputId": "96d053ce-bcc5-4792-c183-0636f856f83f"
      },
      "execution_count": null,
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "OpenAI API Key: ··········\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "U57x2_87YSpb"
      },
      "outputs": [],
      "source": [
        "from langchain_openai import OpenAIEmbeddings\n",
        "\n",
        "model_name = 'text-embedding-ada-002'\n",
        "#model_name = 'text-embedding-3-small'\n",
        "\n",
        "embed = OpenAIEmbeddings(\n",
        "    model=model_name,\n",
        "    openai_api_key=OPENAI_API_KEY\n",
        ")"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "The execution of this cell may take 3 to 5 minutes. If you want it to be faster, you can reduce the number of records in the dataset."
      ],
      "metadata": {
        "id": "cgTCwF7UMyNW"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "directory_cdb = '/content/drive/MyDrive/chromadb'\n",
        "chroma_db = Chroma.from_documents(\n",
        "    df_document, embed, persist_directory=directory_cdb\n",
        ")"
      ],
      "metadata": {
        "id": "SEhQMQ8eCMj8"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "We are going to create three objects.\n",
        "\n",
        "* The language model, which can be any of those from OpenAI, the most common being gpt-3.5.\n",
        "* The memory, responsible for keeping the prompt with all the necessary history.\n",
        "* The retrieval, used to obtain information stored in ChromaDB."
      ],
      "metadata": {
        "id": "KhjV-T8GoarF"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "zMRs9Klic5-Y"
      },
      "outputs": [],
      "source": [
        "from langchain.chat_models import ChatOpenAI\n",
        "from langchain_openai import OpenAI\n",
        "from langchain.chains.conversation.memory import ConversationBufferWindowMemory\n",
        "from langchain.chains import RetrievalQA\n",
        "\n",
        "llm=OpenAI(openai_api_key=OPENAI_API_KEY,\n",
        "           temperature=0.0)\n",
        "\n",
        "conversational_memory = ConversationBufferWindowMemory(\n",
        "    memory_key='chat_history',\n",
        "    k=4, #Number of messages stored in memory\n",
        "    return_messages=True #Must return the messages in the response.\n",
        ")\n",
        "\n",
        "qa = RetrievalQA.from_chain_type(\n",
        "    llm=llm,\n",
        "    chain_type=\"stuff\",\n",
        "    retriever=chroma_db.as_retriever()\n",
        ")"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "We can try the isolated Retrieval to see if the information it returns is relevant.\n",
        "\n",
        "\n"
      ],
      "metadata": {
        "id": "ergrieE4o8qu"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 52
        },
        "id": "LaYSq0V-dxHw",
        "outputId": "43ef40a5-0fc0-46f8-8cfe-0cecbfdcf890"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "' The main symptom of LCM is a biphasic febrile illness, which includes symptoms such as fever, malaise, lack of appetite, muscle aches, headache, nausea, and vomiting.'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 59
        }
      ],
      "source": [
        "qa.run(\"What is the main symptom of LCM?\")"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Perfect! The information returned is exactly what we desired.\n",
        "\n",
        "## Creating the Agent."
      ],
      "metadata": {
        "id": "pf9MXPeipEBO"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "FwCYrS4duqBW"
      },
      "outputs": [],
      "source": [
        "from langchain.agents import Tool, AgentExecutor\n",
        "\n",
        "#Defining the list of tool objects to be used by LangChain.\n",
        "tools = [\n",
        "    Tool(\n",
        "        name='Medical KB',\n",
        "        func=qa.run,\n",
        "        description=(\n",
        "            'use this tool when answering medical knowledge queries to get '\n",
        "            'more information about the topic'\n",
        "        )\n",
        "    )\n",
        "]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "JaKTzPUEvOoy"
      },
      "outputs": [],
      "source": [
        "from langchain.agents import initialize_agent, create_react_agent\n",
        "from langchain import hub\n",
        "\n",
        "prompt = hub.pull(\"hwchase17/react-chat\")\n",
        "agent = create_react_agent(\n",
        "    #agent='chat-conversational-react-description',\n",
        "    tools=tools,\n",
        "    llm=llm,\n",
        "    prompt=prompt\n",
        "    #verbose=True,\n",
        "    #max_iterations=3,\n",
        "    #early_stopping_method='generate',\n",
        "    #memory=conversational_memory\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Create an agent executor by passing in the agent and tools\n",
        "agent_executor = AgentExecutor(agent=agent,\n",
        "                               tools=tools,\n",
        "                               verbose=True,\n",
        "                               memory=conversational_memory,\n",
        "                               max_iterations=30,\n",
        "                               max_execution_time=600,\n",
        "                               #early_stopping_method='generate',\n",
        "                               handle_parsing_errors=True\n",
        "                               )"
      ],
      "metadata": {
        "id": "5sjTL1EU1vNW"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "IlxUBWKcvzeP"
      },
      "source": [
        "### Using the Conversational Agent"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ZZapCP4Pv2kz"
      },
      "source": [
        "To make queries we simply call the `agent` directly.\n",
        "\n",
        "First i will try a order not related to the Medical field."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "85vipqC02deV",
        "outputId": "7f83f0b6-e65d-45fc-9dd3-449bd0d59a09"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "\n",
            "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
            "\u001b[32;1m\u001b[1;3mThought: Do I need to use a tool? Yes\n",
            "Action: Medical KB\n",
            "Action Input: Area of square\u001b[0m\u001b[36;1m\u001b[1;3m I don't know.\u001b[0m\u001b[32;1m\u001b[1;3m Do I need to use a tool? No\n",
            "Final Answer: The area of a square with sides of 2 units is 4 square units.\u001b[0m\n",
            "\n",
            "\u001b[1m> Finished chain.\u001b[0m\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'input': 'Give me the area of square of 2x2',\n",
              " 'chat_history': [],\n",
              " 'output': 'The area of a square with sides of 2 units is 4 square units.'}"
            ]
          },
          "metadata": {},
          "execution_count": 64
        }
      ],
      "source": [
        "agent_executor.invoke({\"input\": \"Give me the area of square of 2x2\"})"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Perfect, the model has responded without accessing the configured knowledge database.\n",
        "\n",
        "Now I will try with a question that is also not related to health."
      ],
      "metadata": {
        "id": "8suStMR7G11e"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "agent_executor.invoke({\"input\": \"Do you know who is Clark Kent?\"})"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "9YEsxEuCVgMv",
        "outputId": "72db9e1e-d716-42f1-fd02-9fbd78c83ba2"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "\n",
            "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
            "\u001b[32;1m\u001b[1;3m\n",
            "Thought: Do I need to use a tool? Yes\n",
            "Action: Medical KB\n",
            "Action Input: Clark Kent\u001b[0m\u001b[36;1m\u001b[1;3m I don't know.\u001b[0m\u001b[32;1m\u001b[1;3mDo I need to use a tool? No\n",
            "Final Answer: Clark Kent is the secret identity of the superhero Superman. He is a journalist for the Daily Planet and lives in Metropolis.\u001b[0m\n",
            "\n",
            "\u001b[1m> Finished chain.\u001b[0m\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'input': 'Do you know who is Clark Kent?',\n",
              " 'chat_history': [],\n",
              " 'output': 'Clark Kent is the secret identity of the superhero Superman. He is a journalist for the Daily Planet and lives in Metropolis.'}"
            ]
          },
          "metadata": {},
          "execution_count": 70
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "It has not accessed either, as the model has been able to identify that it is not a question related to the database that LangChain provides.\n",
        "\n",
        "Now it's time to try with a question related to Medicine. Let's see if the model can understand that it should first look for information in the vector database at its disposal."
      ],
      "metadata": {
        "id": "lyd0XJ3CHjVb"
      }
    },
    {
      "cell_type": "code",
      "source": [
        " agent_executor.memory.clear()"
      ],
      "metadata": {
        "id": "Wtwgfuy158LV"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "RJoAhy76vzAB",
        "outputId": "ebc07d40-4d39-4ea5-b2fa-b9b2d67f6433"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "\n",
            "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
            "\u001b[32;1m\u001b[1;3m\n",
            "Thought: Do I need to use a tool? Yes\n",
            "Action: Medical KB\n",
            "Action Input: Botulism\u001b[0m\u001b[36;1m\u001b[1;3m Botulism is a rare but serious paralytic illness caused by a nerve toxin produced by certain bacteria. It can be contracted through contaminated food, wounds, or ingestion of bacterial spores. Symptoms include muscle paralysis, difficulty swallowing, and respiratory failure. Treatment includes antitoxin, supportive care, and removal of the source of the toxin.\u001b[0m\u001b[32;1m\u001b[1;3mDo I need to use a tool? No\n",
            "Final Answer: To confirm the diagnosis, you can perform a physical exam and order laboratory tests, such as a stool or blood test, to detect the presence of the bacteria or its toxin. It is important to act quickly, as botulism can be life-threatening if left untreated.\u001b[0m\n",
            "\n",
            "\u001b[1m> Finished chain.\u001b[0m\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'input': 'I have a patient that can have Botulism,\\nhow can I confirm the diagnose?',\n",
              " 'chat_history': [],\n",
              " 'output': 'To confirm the diagnosis, you can perform a physical exam and order laboratory tests, such as a stool or blood test, to detect the presence of the bacteria or its toxin. It is important to act quickly, as botulism can be life-threatening if left untreated.'}"
            ]
          },
          "metadata": {},
          "execution_count": 72
        }
      ],
      "source": [
        "agent_executor.invoke({\"input\": \"\"\"I have a patient that can have Botulism,\n",
        "how can I confirm the diagnose?\"\"\"})"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Perfect, the most important thing for us is that it has been able to identify that it should go to the medical database to search for information about the symptoms."
      ],
      "metadata": {
        "id": "8lsaoF8nJNnR"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "mQeicHTj2pmY",
        "outputId": "fd167078-136c-4f9e-8eb5-e0abe713ad35"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "\n",
            "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
            "\u001b[32;1m\u001b[1;3m\n",
            "Thought: Do I need to use a tool? No\n",
            "Final Answer: Yes, botulism is a serious illness that can be life-threatening if left untreated. It is important to seek medical attention and confirm the diagnosis as soon as possible.\u001b[0m\n",
            "\n",
            "\u001b[1m> Finished chain.\u001b[0m\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'input': 'Is this an important illness?',\n",
              " 'chat_history': [HumanMessage(content='I have a patient that can have Botulism,\\nhow can I confirm the diagnose?'),\n",
              "  AIMessage(content='To confirm the diagnosis, you can perform a physical exam and order laboratory tests, such as a stool or blood test, to detect the presence of the bacteria or its toxin. It is important to act quickly, as botulism can be life-threatening if left untreated.')],\n",
              " 'output': 'Yes, botulism is a serious illness that can be life-threatening if left untreated. It is important to seek medical attention and confirm the diagnosis as soon as possible.'}"
            ]
          },
          "metadata": {},
          "execution_count": 73
        }
      ],
      "source": [
        "agent_executor.invoke({\"input\": \"Is this an important illness?\"})"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "And the memory works perfectly. We can maintain a conversation, taking into account that the model knows the previous questions and answers.\n",
        "\n",
        "# Conclusions.\n",
        "The experiment has been a small success. The Vectorial database has been configured and filled with information from the dataset. A LangChain agent has been created, and it has been able to retrieve information from the database only when necessary. Don't forget that our ChatBot has memory.\n",
        "\n",
        "All of this in just a few lines of code!\n"
      ],
      "metadata": {
        "id": "gdyhyd6nJnwN"
      }
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Ykg5TYA033yR"
      },
      "source": [
        "---"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": [],
      "machine_shape": "hm",
      "gpuType": "V100",
      "include_colab_link": true
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.9.12"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "bc26fc0325be4917b221a832da08c4d2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_621edf1b193c4918a42ea2b773babcdf",
              "IPY_MODEL_8b14260b9fa1485f983050aaa3dfe00c",
              "IPY_MODEL_60428c7c0b3644ff8da9c72d7e69a4dd"
            ],
            "layout": "IPY_MODEL_554df2549be24c1ea95cb5dc54c5aacf"
          }
        },
        "621edf1b193c4918a42ea2b773babcdf": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b81e56bbbeae45c9a476d15a0df77831",
            "placeholder": "​",
            "style": "IPY_MODEL_dd438c2519de4e5398341301e0539e66",
            "value": "Downloading readme: 100%"
          }
        },
        "8b14260b9fa1485f983050aaa3dfe00c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_880170b293ec45dca6ec2a0b10ea367a",
            "max": 233,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_164a608a506941dbb638d928cf79720b",
            "value": 233
          }
        },
        "60428c7c0b3644ff8da9c72d7e69a4dd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f2523dc2263d493c999475c5c98825d3",
            "placeholder": "​",
            "style": "IPY_MODEL_532af86698554579b9b80fcd4eb3cc88",
            "value": " 233/233 [00:00&lt;00:00, 12.6kB/s]"
          }
        },
        "554df2549be24c1ea95cb5dc54c5aacf": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b81e56bbbeae45c9a476d15a0df77831": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "dd438c2519de4e5398341301e0539e66": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "880170b293ec45dca6ec2a0b10ea367a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "164a608a506941dbb638d928cf79720b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "f2523dc2263d493c999475c5c98825d3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "532af86698554579b9b80fcd4eb3cc88": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "2824f1c25cd34b528f696fbb46e9212e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_17afad8a68244e7c984cb19b81b1f3a7",
              "IPY_MODEL_835caad936154f909ca2fb7958c8dfa2",
              "IPY_MODEL_fc6aa57d36a84c32becf2724f3bd69cc"
            ],
            "layout": "IPY_MODEL_88e3e8cef8c84fbda28fe5b9d60d7ea4"
          }
        },
        "17afad8a68244e7c984cb19b81b1f3a7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c8950fef248e4d758afabaa72cb565b7",
            "placeholder": "​",
            "style": "IPY_MODEL_368052b9864d426ebf71d34371a825a5",
            "value": "Downloading data: 100%"
          }
        },
        "835caad936154f909ca2fb7958c8dfa2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ed202fe8e1e84ff1bbe7666155c3ade1",
            "max": 22466890,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_f3e77cfaa89f4e2498019a77414e370c",
            "value": 22466890
          }
        },
        "fc6aa57d36a84c32becf2724f3bd69cc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_66f815a7fbb6407a8d25ca5e58931ef0",
            "placeholder": "​",
            "style": "IPY_MODEL_26bbfa6fa34542d9a402c0d3373375bd",
            "value": " 22.5M/22.5M [00:03&lt;00:00, 8.41MB/s]"
          }
        },
        "88e3e8cef8c84fbda28fe5b9d60d7ea4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c8950fef248e4d758afabaa72cb565b7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "368052b9864d426ebf71d34371a825a5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "ed202fe8e1e84ff1bbe7666155c3ade1": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f3e77cfaa89f4e2498019a77414e370c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "66f815a7fbb6407a8d25ca5e58931ef0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "26bbfa6fa34542d9a402c0d3373375bd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "4bfa22635bee48a09d50eb723835e5df": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_cd66a27f359e46cc8344e7fb2d8b4a8f",
              "IPY_MODEL_d956eaeff2ca424388adfcf2914dbdfe",
              "IPY_MODEL_69234e6ff09f4bfa80965495c66346e5"
            ],
            "layout": "IPY_MODEL_7bb305919c4846ca8af19f39aecbb662"
          }
        },
        "cd66a27f359e46cc8344e7fb2d8b4a8f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8de4268d916a438eaa0b82e9a7e4f77b",
            "placeholder": "​",
            "style": "IPY_MODEL_337aad5247624adab2661c91842d27bd",
            "value": "Generating train split: "
          }
        },
        "d956eaeff2ca424388adfcf2914dbdfe": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_98676a2ad920453094ff0bc0120f9a04",
            "max": 1,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_670e6bf31e874fb58a893e4e3ef1dbd7",
            "value": 1
          }
        },
        "69234e6ff09f4bfa80965495c66346e5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c6df912bd8414bd58bf23c5706706a3a",
            "placeholder": "​",
            "style": "IPY_MODEL_1ceeaad8e55140a9a2175e85774acc0e",
            "value": " 16407/0 [00:00&lt;00:00, 44608.46 examples/s]"
          }
        },
        "7bb305919c4846ca8af19f39aecbb662": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "8de4268d916a438eaa0b82e9a7e4f77b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "337aad5247624adab2661c91842d27bd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "98676a2ad920453094ff0bc0120f9a04": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": "20px"
          }
        },
        "670e6bf31e874fb58a893e4e3ef1dbd7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "c6df912bd8414bd58bf23c5706706a3a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1ceeaad8e55140a9a2175e85774acc0e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}